import pandas as pd
import seaborn as sns
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
For this excercise, we have written the following code to load the stock dataset built into plotly express.
stocks = px.data.stocks()
stocks.head()
| date | GOOG | AAPL | AMZN | FB | NFLX | MSFT | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 2018-01-08 | 1.018172 | 1.011943 | 1.061881 | 0.959968 | 1.053526 | 1.015988 |
| 2 | 2018-01-15 | 1.032008 | 1.019771 | 1.053240 | 0.970243 | 1.049860 | 1.020524 |
| 3 | 2018-01-22 | 1.066783 | 0.980057 | 1.140676 | 1.016858 | 1.307681 | 1.066561 |
| 4 | 2018-01-29 | 1.008773 | 0.917143 | 1.163374 | 1.018357 | 1.273537 | 1.040708 |
Select a stock and create a suitable plot for it. Make sure the plot is readable with relevant information, such as date, values.
ax = plt.axes()
ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(1))
# First instantiate an Axes object - used to manage the axes in the graph: ax=plot.Axes().
# Then use the Locator instance to set the primary and secondary scales for the x-axis (ax.xaxis) or y-axis (ax.yaxis).
import matplotlib.ticker as ticker
x=stocks['date']
y=stocks['GOOG']
plt.figure(figsize=(12,9))
ax = plt.axes()
ax.xaxis.set_major_locator(ticker.MultipleLocator(12))
plt.plot(x,y)
plt.xlabel('date')
plt.ylabel('stock value')
plt.title('Google stock')
plt.show()
# YOUR CODE HERE
You've already plot data from one stock. It is possible to plot multiples of them to support comparison.
To highlight different lines, customise line styles, markers, colors and include a legend to the plot.
# YOUR CODE HERE
x=stocks['date']
y=stocks['GOOG']
plt.figure(figsize=(12,9))
ax = plt.axes()
ax.xaxis.set_major_locator(ticker.MultipleLocator(12))
plt.plot(x,y)
for i in list(stocks.columns)[2:]:
plt.plot(x,stocks[i])
plt.xlabel('date')
plt.ylabel('stock value')
plt.title('Google stock')
plt.legend([x for x in list(stocks.columns)[1:]])
plt.show()
First, load the tips dataset
tips = sns.load_dataset('tips')
tips.head()
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
Let's explore this dataset. Pose a question and create a plot that support drawing answers for your question.
Some possible questions:
# YOUR CODE HERE
#Are there differences between male and female when it comes to giving tips?
sns.violinplot(x='day',y='tip',hue='sex',data=tips)
<AxesSubplot:xlabel='day', ylabel='tip'>
Males are found to give more tips.
#What attribute correlate the most with tip
sns.set_style("ticks")
sns.pairplot(tips,hue = 'sex',diag_kind = "kde",kind = "scatter",palette = "husl")
plt.show()
Redo the above exercises (challenges 2 & 3) with plotly express. Create diagrams which you can interact with.
Hints:
stocks = px.data.stocks()
stocks
| date | GOOG | AAPL | AMZN | FB | NFLX | MSFT | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 2018-01-08 | 1.018172 | 1.011943 | 1.061881 | 0.959968 | 1.053526 | 1.015988 |
| 2 | 2018-01-15 | 1.032008 | 1.019771 | 1.053240 | 0.970243 | 1.049860 | 1.020524 |
| 3 | 2018-01-22 | 1.066783 | 0.980057 | 1.140676 | 1.016858 | 1.307681 | 1.066561 |
| 4 | 2018-01-29 | 1.008773 | 0.917143 | 1.163374 | 1.018357 | 1.273537 | 1.040708 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 100 | 2019-12-02 | 1.216280 | 1.546914 | 1.425061 | 1.075997 | 1.463641 | 1.720717 |
| 101 | 2019-12-09 | 1.222821 | 1.572286 | 1.432660 | 1.038855 | 1.421496 | 1.752239 |
| 102 | 2019-12-16 | 1.224418 | 1.596800 | 1.453455 | 1.104094 | 1.604362 | 1.784896 |
| 103 | 2019-12-23 | 1.226504 | 1.656000 | 1.521226 | 1.113728 | 1.567170 | 1.802472 |
| 104 | 2019-12-30 | 1.213014 | 1.678000 | 1.503360 | 1.098475 | 1.540883 | 1.788185 |
105 rows × 7 columns
new=stocks.set_index(['date'])
new=new.stack().reset_index()
new
new=new.rename(columns={'level_1':'Type',0:'Value'})
new#Now the structure of new can be used by px
| date | Type | Value | |
|---|---|---|---|
| 0 | 2018-01-01 | GOOG | 1.000000 |
| 1 | 2018-01-01 | AAPL | 1.000000 |
| 2 | 2018-01-01 | AMZN | 1.000000 |
| 3 | 2018-01-01 | FB | 1.000000 |
| 4 | 2018-01-01 | NFLX | 1.000000 |
| ... | ... | ... | ... |
| 625 | 2019-12-30 | AAPL | 1.678000 |
| 626 | 2019-12-30 | AMZN | 1.503360 |
| 627 | 2019-12-30 | FB | 1.098475 |
| 628 | 2019-12-30 | NFLX | 1.540883 |
| 629 | 2019-12-30 | MSFT | 1.788185 |
630 rows × 3 columns
new.head()
| date | Type | Value | |
|---|---|---|---|
| 0 | 2018-01-01 | GOOG | 1.0 |
| 1 | 2018-01-01 | AAPL | 1.0 |
| 2 | 2018-01-01 | AMZN | 1.0 |
| 3 | 2018-01-01 | FB | 1.0 |
| 4 | 2018-01-01 | NFLX | 1.0 |
# YOUR CODE HERE
# df=px.data.new()
fig=px.line(new,x='date',y='Value',color='Type',hover_name='Type')
fig.show()
# YOUR CODE HERE
df=px.data.tips()
df
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
fig=px.scatter(df,x='total_bill',y='tip',color='sex',facet_col='smoker',facet_row='time')
fig.show()
Recreate the barplot below that shows the population of different continents for the year 2007.
Hints:
#load data
df = px.data.gapminder()
import plotly.graph_objects as go
df=df[df['year']==2007]
df
| country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
|---|---|---|---|---|---|---|---|---|
| 11 | Afghanistan | Asia | 2007 | 43.828 | 31889923 | 974.580338 | AFG | 4 |
| 23 | Albania | Europe | 2007 | 76.423 | 3600523 | 5937.029526 | ALB | 8 |
| 35 | Algeria | Africa | 2007 | 72.301 | 33333216 | 6223.367465 | DZA | 12 |
| 47 | Angola | Africa | 2007 | 42.731 | 12420476 | 4797.231267 | AGO | 24 |
| 59 | Argentina | Americas | 2007 | 75.320 | 40301927 | 12779.379640 | ARG | 32 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1655 | Vietnam | Asia | 2007 | 74.249 | 85262356 | 2441.576404 | VNM | 704 |
| 1667 | West Bank and Gaza | Asia | 2007 | 73.422 | 4018332 | 3025.349798 | PSE | 275 |
| 1679 | Yemen, Rep. | Asia | 2007 | 62.698 | 22211743 | 2280.769906 | YEM | 887 |
| 1691 | Zambia | Africa | 2007 | 42.384 | 11746035 | 1271.211593 | ZMB | 894 |
| 1703 | Zimbabwe | Africa | 2007 | 43.487 | 12311143 | 469.709298 | ZWE | 716 |
142 rows × 8 columns
a=df.loc[:,['continent','pop']]
a
| continent | pop | |
|---|---|---|
| 11 | Asia | 31889923 |
| 23 | Europe | 3600523 |
| 35 | Africa | 33333216 |
| 47 | Africa | 12420476 |
| 59 | Americas | 40301927 |
| ... | ... | ... |
| 1655 | Asia | 85262356 |
| 1667 | Asia | 4018332 |
| 1679 | Asia | 22211743 |
| 1691 | Africa | 11746035 |
| 1703 | Africa | 12311143 |
142 rows × 2 columns
b=a.groupby('continent').sum()
c=b.reset_index()
c
| continent | pop | |
|---|---|---|
| 0 | Africa | 929539692 |
| 1 | Americas | 898871184 |
| 2 | Asia | 3811953827 |
| 3 | Europe | 586098529 |
| 4 | Oceania | 24549947 |
# #First method go.Figure
# fig=go.Figure(data=go.Bar(y=df['continent'],x=df['pop'],orientation='h',color='continent'))
# fig.show()
# YOUR CODE HERE
fig=px.bar(c,x='pop',y='continent',color='continent',text='pop')
fig.show()
fig.update_yaxes(categoryorder='total ascending')#order should be determined by the numerical order of the values
fig.update_traces(textposition='outside',selector=dict(type='bar'))